/*
Copyright 2008-2009 Elöd Egyed-Zsigmond, Cyril Laitang
Copyright 2009-2011 Samuel Gesche

This file is part of IPRI News Analyzer.

IPRI News Analyzer is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

IPRI News Analyzer is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with IPRI News Analyzer.  If not, see <http://www.gnu.org/licenses/>.
*/

package proc.rss;

import data.base.Config;
import data.base.Database;
import data.base.NoBaseException;
import data.base.connectors.RSSFeedDatabase;
import data.structures.rss.RSSFeedInfoExtended;
import data.structures.rss.BaseRSSItem;
import data.structures.rss.RSSItem;
import data.structures.tagging.LemmaVector;

import proc.tagging.TreeTagger;
import proc.text.Codecs;
import proc.text.XMLCleaner;
import proc.text.Out;

import com.sun.syndication.feed.synd.SyndEntryImpl;

import javax.swing.Timer;

import java.awt.event.ActionEvent;
import java.awt.event.ActionListener;

import java.util.Arrays;
import java.util.Comparator;
import java.util.Date;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
import java.util.Vector;

import java.text.SimpleDateFormat;

public class RSSSaver {

    private Database theDB;
    // Nombre de nouvelles entrées dans la base.
    private int totalItemCount = 0;
    private Traitement traitements;
    private boolean lemmatisation = true;

    private Timer verifTraitement = new Timer(1000, new ActionListener(){
        public void actionPerformed(ActionEvent ae){
            if(traitements.getMillisExecution()>60000){
                traitements.stoppe();
                traitements = new Traitement();
                traitements.setPriority(Thread.MAX_PRIORITY);
            }
        }
    });

    public RSSSaver(Database db) {
        theDB = db;
        traitements = new Traitement();
        traitements.setPriority(Thread.MAX_PRIORITY);
    }

    public void reLemmatise(RSSItem item) throws NoBaseException {
        RSSLemmatizer rrs_lem = new RSSLemmatizer(theDB);
        TreeTagger tt = new TreeTagger();
        LemmaVector lemmasTitle = tt.processText(item.getTitle());
        LemmaVector lemmasDesc = tt.processText(item.getDescription());
        item.setTitleLemmes(lemmasTitle.toLemmas());
        item.setDescLemmes(lemmasDesc.toLemmas());
        RSSFeedDatabase rfb = new RSSFeedDatabase(theDB);
        rfb.UpdateLemmas(item);
        rrs_lem.addLemme(item.getId(), lemmasTitle, lemmasDesc);
    }

    public void activeLemmatisation(){
        lemmatisation = true;
    }

    public void desactiveLemmatisation(){
        lemmatisation = false;
    }

    public boolean getStatutLemmatisation(){
        return lemmatisation;
    }

    public RSSFeedInfoExtended[] getFeedsList() throws NoBaseException {
        //Get the list of all RSS feeds from the database
        RSSFeedDatabase rS = new RSSFeedDatabase(theDB);
        Vector<RSSFeedInfoExtended> theFeeds;
        theFeeds = rS.GetExtendedRSSFeedList();
        RSSFeedInfoExtended[] result = new RSSFeedInfoExtended[theFeeds.size()];
        theFeeds.toArray(result);
        Arrays.sort(result, new TrieurDisponibiliteFlux());
        return result;
    }

    public RSSFeedInfoExtended[] getFeedsList(Vector<RSSFeedInfoExtended> preListe) {
        Vector<RSSFeedInfoExtended> theFeeds;
        theFeeds = preListe;
        RSSFeedInfoExtended[] result = new RSSFeedInfoExtended[theFeeds.size()];
        theFeeds.toArray(result);
        Arrays.sort(result, new TrieurDisponibiliteFlux());
        return result;
    }
    
    private String readErrorMessage = "";
    private String readSuccessMessage = "";

    public int readFeed(RSSFeedInfoExtended feed) throws NoBaseException {
        RSSFeedDatabase rS = new RSSFeedDatabase(theDB);
        int newItemCount = 0;
        try {
            
            BaseRSSItem[] entryList = RSSParser.lectureFlux(feed);

            Vector<BaseRSSItem> entries = new Vector<BaseRSSItem>();
            int ifeedid = Integer.valueOf(feed.ID);
            Date lastDate = rS.GetFeedLastEntryDate(ifeedid);
            Date d1 = new Date(Long.MAX_VALUE);
            Date d2 = new Date(0);
            for (int i = 0 ; i<entryList.length; i++) {
                if (isToInsert(entryList[i], lastDate)) {
                    entries.addElement(entryList[i]);
                    newItemCount++;
                    totalItemCount++;
                }
                if(entryList[i].getPubDate().getTime()<=d1.getTime()){
                    d1.setTime(entryList[i].getPubDate().getTime());
                }
                if(entryList[i].getPubDate().getTime()>=d2.getTime()){
                    d2.setTime(entryList[i].getPubDate().getTime());
                }
                BaseRSSItem[] liste = new BaseRSSItem[entries.size()];
                entries.toArray(liste);
                insert(liste);
            }
            SimpleDateFormat sdf = new SimpleDateFormat("dd/MM/yy HH:mm");
            if(entryList.length==0){
                readSuccessMessage = "aucune entrée trouvée";
            } else {
                readSuccessMessage = entryList.length+" entrée"+(entryList.length>1?"s":"")+
                    " du "+sdf.format(d1)+" au "+sdf.format(d2)+
                    ", dernier enregistrement du "+sdf.format(lastDate);
            }
            readErrorMessage = "";
        } catch (RSSParsingException e) {
            //e.printStackTrace();
            newItemCount = -1;
            readErrorMessage = e.getMessage();
            readSuccessMessage = "";
        }
        logRead(Integer.parseInt(feed.ID), System.currentTimeMillis(), newItemCount, 
                readErrorMessage+readSuccessMessage);
        return newItemCount;
    }

    public String getReadErrorMessage() {
        return readErrorMessage;
    }

    public String getReadSuccessMessage() {
        return readSuccessMessage;
    }

    public void logRead(int idFlux, long date, int newItems, String error) throws NoBaseException {
        RSSFeedDatabase db = new RSSFeedDatabase(theDB);
        db.logRead(idFlux, date, newItems, error);
    }

    public void start() {
        traitements.start();
    }
    private Set<Entree> entrees = new HashSet<Entree>();
    private final Object lock = new Object();

    /*private void insert(SyndEntryImpl[] entries, int ifeedid) {
        synchronized (lock) {
            //System.out.println("Insertion de "+entries.length+" entrées (flux "+ifeedid+")");
            for (int i = 0; i < entries.length; i++) {
                entrees.add(new Entree(entries[i], ifeedid));
                //System.out.println("   Inséré : "+entries[i].getTitle());
            }
        }
        for (int i = 0; i < traitements.length; i++) {
            traitements[i].interrupt();
        }
    }*/

    private void insert(BaseRSSItem[] entries) {
        synchronized (lock) {
            //System.out.println("Insertion de "+entries.length+" entrées (flux "+ifeedid+")");
            for (int i = 0; i < entries.length; i++) {
                entrees.add(new Entree(entries[i]));
                //System.out.println("   Inséré : "+entries[i].getTitle());
            }
        }
        // L'interruption sert à autre chose maintenant, et 5s de latence
        // quand il n'y a rien n'est pas bien grave.
        /*for (int i = 0; i < traitements.length; i++) {
            traitements[i].interrupt();
        }*/
    }

    private Entree pioche() {
        synchronized (lock) {
            Entree e;
            //System.out.println("Choix d'un article à traiter parmi "+entrees.size());
            Iterator<Entree> i = entrees.iterator();
            e = i.next();
            while (i.hasNext()) {
                Entree ee = i.next();
                if (ee.getDate().before(e.getDate())) {
                    e = ee;
                }
            }
            entrees.remove(e);
            //System.out.println("Article à traiter pioché par "+Thread.currentThread().getName()+".");
            return e;
        }
    }

    public int getEntreesEnReserve() {
        return entrees.size();
    }

    boolean isToInsert(SyndEntryImpl entry, Date lastDate) throws NoDateException {
        return isToInsert(entry.getPublishedDate(), lastDate);
    }

    boolean isToInsert(BaseRSSItem entry, Date lastDate) {
        boolean b = false;
        try{
            b = isToInsert(entry.getPubDate(), lastDate);
        } catch(NoDateException nde){
            // pas d'exception possible théoriquement avec le RSSParser (il en
            // envoie déjà beaucoup lui-même).
        }
        return b;
    }

    boolean isToInsert(Date entryPubDate, Date lastDate) throws NoDateException {
        Date limite = new Date(Config.getDateDepartCorpus());
        boolean result = false;
        if (entryPubDate != null) {
            //debug
            //System.out.println(String.format(" CheckedDate '%tY-%tm-%td %tH:%tM:%tS' , result %b", entryPubDate,entryPubDate,entryPubDate,entryPubDate,entryPubDate,entryPubDate, entryPubDate.after(lastDate) ));
            if (entryPubDate.getTime() - lastDate.getTime() >= 1000 &&
                    //au lieu d'utiliser Date.after(Date),
                    //parce que de temps en temps il y a quelques millisecondes
                    //de décalage, allez savoir pourquoi
                    entryPubDate.getTime() - limite.getTime() >= 1000 &&
                    entryPubDate.getTime() < System.currentTimeMillis()) {
                //parce que certains articles sont postdatés
                result = true;
            }
        } else {
            //pubdate null
            throw new NoDateException();
        }
        //System.out.println("Limite : "+limite+" - dernière date : "+lastDate+" - date à vérifier : "+entryPubDate+" - résultat : "+result);
        return result;
    }

    public int getEntryCount() {
        return totalItemCount;
    }

    class Traitement extends Thread {

        private boolean enCours = false;
        private boolean stoppe = false;
        private long dernierDepart = System.currentTimeMillis();

        public long getMillisExecution(){
            return System.currentTimeMillis() - dernierDepart;
        }

        public void stoppe(){
            stoppe = true;
        }

        @Override
        public void run() {
            try {
                dernierDepart = System.currentTimeMillis();
                RSSFeedDatabase rS = new RSSFeedDatabase(theDB);
                RSSLemmatizer rrs_lem = new RSSLemmatizer(theDB);
                TreeTagger tt = new TreeTagger();
                try {
                    while (!stoppe) {
                        if (entrees.size() > 0) {
                            Entree e = pioche();
                            enCours = true;
                            boolean lemmatisationActive = lemmatisation;
                            BaseRSSItem item = e.getEntree();
                            int ifeedid = e.getIdFlux();
                            Date lastDate = rS.GetFeedLastEntryDate(ifeedid);
                            if (isToInsert(item, lastDate)) {
                                String desc = item.getDescription();
                                String titre = Codecs.deHTMLize(XMLCleaner.xmlToText(item.getTitle()));
                                desc = Codecs.deHTMLize(XMLCleaner.xmlToText(desc));
                                /*long time = System.currentTimeMillis();
                                Out.printInfo("Traitement de l'article " + titre +
                                        " (flux " + ifeedid + ")");*/
                                String url = item.getLink();
                                if (url == null) {
                                    url = "";
                                }
                                if (url.equals("")) {
                                    Out.printErreur("Pas d'URL dans l'article " + Codecs.desEscapeHTML(titre) +
                                            " (flux " + rS.getFeedURL(e.getIdFlux()) + ")");
                                }
                                EntreeBase e2;
                                if (lemmatisationActive) {
                                    LemmaVector lemmasTitle = tt.processText(titre);
                                    LemmaVector lemmasDesc = tt.processText(desc);
                                    e2 = new EntreeBase(ifeedid,
                                            titre, desc,
                                            url, item.getPubDate(),
                                            lemmasTitle, lemmasDesc);
                                } else {
                                    e2 = new EntreeBase(ifeedid,
                                            titre, desc,
                                            url, item.getPubDate(),
                                            null, null);
                                }
                                if (!stoppe) {
                                    if (lemmatisationActive) {
                                        Out.printInfo("Stockage de l'article : " +
                                                Codecs.desEscapeHTML(Codecs.escapeHTML(e2.getTitle())) +
                                                " (flux " + rS.getFeedName(e2.getId()) + ", " + (e2.getLemmaTitle().getLemmas().size() +
                                                e2.getLemmaDesc().getLemmas().size()) + " lemmes, daté du " + e2.getPubDate() + ")...");
                                        rS.InsertEntry(e2.getId(), e2.getTitle(),
                                                e2.getDesc(),
                                                e2.getLink(), e2.getPubDate(),
                                                e2.getLemmaTitle().toLemmas(),
                                                e2.getLemmaDesc().toLemmas());
                                        // add lemma list ...
                                        int itemId = rS.getLastInsertEntry();
                                        rrs_lem.addLemme(itemId, e2.getLemmaTitle(), e2.getLemmaDesc());
                                    } else {
                                        Out.printInfo("Stockage de l'article : " +
                                                Codecs.desEscapeHTML(Codecs.escapeHTML(e2.getTitle())) +
                                                " (flux " + rS.getFeedName(e2.getId()) + ", " +
                                                " non lemmatisé, daté du " + e2.getPubDate() + ")...");
                                        rS.InsertEntry(e2.getId(), e2.getTitle(),
                                                e2.getDesc(),
                                                e2.getLink(), e2.getPubDate());
                                    }

                                    /*long elapsed0 = System.currentTimeMillis() - time;
                                    int elapsed = (int) elapsed0;
                                    int elapsed2 = (int) (System.currentTimeMillis() - time - elapsed0);
                                    Out.printInfo("Stockage de l'article effectué en " + (elapsed / 1000) +
                                            "s - stockage des lemmes effectué en " + (elapsed2 / 1000) + "s.");*/
                                } else {
                                    entrees.add(e); // on annule
                                    Out.printErreur("Temps de traitement trop long : " + e.getEntree().toString());
                                }
                            } else {

                            }
                            enCours = false;
                        } else {
                            enCours = false;
                            try {
                                Thread.sleep(5000);
                            } catch (InterruptedException ie) {

                            }
                        }
                    }
                } catch (Exception ex) {
                }
            } catch(NoBaseException nbe){

            }
        }

        @Override
        public String toString() {
            return "" + enCours;
        }
    }
}


class Entree {

    private BaseRSSItem entree;

    public Entree(BaseRSSItem entree) {
        this.entree = entree;
    }

    public BaseRSSItem getEntree() {
        return entree;
    }

    public Date getDate() {
        return entree.getPubDate();
    }

    public int getIdFlux() {
        return entree.getIdFluxRSS();
    }

    @Override
    public boolean equals(Object o) {
        try {
            Entree e = (Entree) o;
            return e.entree.equals(entree);
        } catch (ClassCastException cce) {
            return false;
        }
    }

    @Override
    public int hashCode() {
        return entree.hashCode();
    }
}

class EntreeBase {

    private int id;
    private String title;
    private String desc;
    private String link;
    private Date pubDate;
    private LemmaVector lemmaTitle;
    private LemmaVector lemmaDesc;
    private long hashCode;

    public EntreeBase(int id, String title, String desc, String link, Date pubDate,
            LemmaVector lemmaTitle, LemmaVector lemmaDesc) {
        this.id = id;
        this.title = title;
        this.desc = desc;
        this.link = link;
        this.pubDate = pubDate;
        this.lemmaTitle = lemmaTitle;
        this.lemmaDesc = lemmaDesc;
        hashCode = id + title.hashCode() + desc.hashCode() + link.hashCode();
    }

    public String getLink() {
        return link;
    }

    public String getDesc() {
        return desc;
    }

    public int getId() {
        return id;
    }

    public LemmaVector getLemmaDesc() {
        return lemmaDesc;
    }

    public LemmaVector getLemmaTitle() {
        return lemmaTitle;
    }

    public Date getPubDate() {
        return pubDate;
    }

    public String getTitle() {
        return title;
    }

    @Override
    public boolean equals(Object o) {
        if(!(o instanceof EntreeBase)){
            return false;
        }
        EntreeBase e = (EntreeBase) o;
        return e.getId() == id && e.getTitle().equals(title) && e.getPubDate().equals(pubDate);
    }

    @Override
    public int hashCode() {
        return (int) (hashCode % Integer.MAX_VALUE);
    }
}

class NoDateException extends Exception {

    public NoDateException(Throwable arg0) {
        super(arg0);
    }

    public NoDateException(String arg0, Throwable arg1) {
        super(arg0, arg1);
    }

    public NoDateException(String arg0) {
        super(arg0);
    }

    public NoDateException() {
        super();
    }
}

class TrieurDisponibiliteFlux implements Comparator {

    public int compare(Object o1, Object o2) {
        RSSFeedInfoExtended i1 = (RSSFeedInfoExtended) o1;
        RSSFeedInfoExtended i2 = (RSSFeedInfoExtended) o2;
        int diff = 0;
        if (i1.URL.equals("") && !i2.URL.equals("")) {
            diff = Integer.MAX_VALUE;
        } else if (i2.URL.equals("") && !i1.URL.equals("")) {
            diff = Integer.MIN_VALUE;
        } else {
            diff = i1.name.compareToIgnoreCase(i2.name);
        }
        return diff;
    }
}

